package com.ruoyi.common.utils;


/**
 * @author TT
 * @date 2025/4/2 17:16
 * @description 百度爬虫
 */
import java.io.*;
import java.net.*;
import java.util.regex.*;

public class BaiduCrawler {
    public static void main(String[] args) throws IOException {
        String query = "Java 爬虫";
        String encodedQuery = URLEncoder.encode(query, "UTF-8");
        String url = "http://www.baidu.com/s?wd=" + encodedQuery;

// 发送HTTP请求
        HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
        connection.setRequestMethod("GET");

// 读取响应
        BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
        String line;
        StringBuilder response = new StringBuilder();
        while ((line = reader.readLine()) != null) {
            response.append(line);
        }
        reader.close();

// 解析HTML响应
        Pattern pattern = Pattern.compile("href=\"(http://www.baidu.com/link\\?url=.+?)\"");
        Matcher matcher = pattern.matcher(response.toString());
        while (matcher.find()) {
            System.out.println(matcher.group(1));
        }
    }
}
